clear
set more off
pause on

cap log close
log using merge-election.log, text replace

ssc install reclink

******************************************************************************************
* Name: 	merge-election.do
* Purpose: 	this do file uses the cleaned CA election data to merge across years
* input:	ca-localXXXX 
* output:   ca-election-reclink.dta
******************************************************************************************

****************
* Reclink merge
****************

use ca-local1995, clear
gen FIRST1995 = FIRST
gen LAST1995 = LAST
gen CNTYNAME1995 = CNTYNAME

gen id = id1995
forval x=1996/2014 {

	reclink LAST FIRST CNTYNAME using ca-local`x'.dta, idmaster(id) idusing(id`x') gen(score`x') wnomatch(15 5 20) required(CNTYNAME)
	
	* In some instances multiple using data set observations merge to the same master data set observation
	* In these cases, take the one with the higher score
	duplicates tag id`x' if id`x'!=., g(tag_dup)
	gsort id`x' -score`x'
	bys id`x': gen temp = _n
	list LAST ULAST FIRST UFIRST CNTYNAME UCNTYNAME score`x' if tag_dup!=0 & id`x'!=.
	list LAST ULAST FIRST UFIRST CNTYNAME UCNTYNAME score`x' if tag_dup!=0 & id`x'!=. & temp!=1
	drop if tag_dup!=0 & id`x'!=. & temp!=1
	drop tag_dup temp

	* Bring in information from merging data set that didn't merge
	rename FIRST FIRST_temp
	rename LAST LAST_temp
	rename CNTYNAME CNTYNAME_temp
	drop _m
	
	* The merge below adds on the observations from the using data set that didn't have a match in the above merge
	merge m:1 id`x' using ca-local`x'.dta 
	tab _m
	drop _m
	
	* Replace main variables with those from the merging (next year's) data set, if empty
	* This prepares the main variables for the next round of merge by incorporating the next
	* year's observations that didn't merge
	foreach var in LAST FIRST CNTYNAME {
		* replace main variable with merging data set info, if main variable is empty
		replace `var'_temp = `var' if `var'_temp==""
		
		* preserve a record of merging names
		rename `var' `var'`x'
		replace `var'`x' = U`var' if `var'`x'==""

		* main variable
		rename `var'_temp `var'		
	}
	
	
	duplicates report LAST FIRST CNTYNAME
	sort LAST FIRST CNTYNAME
	replace id=_n
		
	duplicates tag LAST FIRST CNTYNAME, g(tag_dup)
	list if tag_dup!=0 
	assert tag_dup==0
	drop tag_dup
	
	duplicates tag id`x' if id`x'!=., g(tag_dup)
	list if tag_dup!=0 & id`x'!=.
	assert tag_dup==0 if id`x'!=.
	drop tag_dup

	duplicates tag id if id!=., g(tag_dup)
	list if tag_dup!=0 & id!=.	
	assert tag_dup==0 if id!=.
	drop tag_dup

	save ca-election-reclink, replace
}

****************
* Save
****************

save ca-election-reclink.dta, replace

egen x=rowtotal(year*)
tab x, m
drop x

